In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv("C:/Users/DELL/OneDrive/Projects/Online Payment Fraud Detection/onlinefraud.csv")
In [4]:
data.head((5))
Out[4]:
step type amount nameOrig oldbalanceOrg newbalanceOrig nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud
0 1 PAYMENT 9839.64 C1231006815 170136.0 160296.36 M1979787155 0.0 0.0 0 0
1 1 PAYMENT 1864.28 C1666544295 21249.0 19384.72 M2044282225 0.0 0.0 0 0
2 1 TRANSFER 181.00 C1305486145 181.0 0.00 C553264065 0.0 0.0 1 0
3 1 CASH_OUT 181.00 C840083671 181.0 0.00 C38997010 21182.0 0.0 1 0
4 1 PAYMENT 11668.14 C2048537720 41554.0 29885.86 M1230701703 0.0 0.0 0 0
In [5]:
print(data.isnull().sum())
step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64
In [6]:
#explore transaction type
data.type.value_counts()
Out[6]:
type
CASH_OUT    2237500
PAYMENT     2151495
CASH_IN     1399284
TRANSFER     532909
DEBIT         41432
Name: count, dtype: int64
In [7]:
type=data["type"].value_counts()
print(type)
type
CASH_OUT    2237500
PAYMENT     2151495
CASH_IN     1399284
TRANSFER     532909
DEBIT         41432
Name: count, dtype: int64
In [8]:
transaction=type.index
quantity=type.values
In [9]:
import plotly.express as px
figure=px.pie(data,values=quantity,names=transaction,hole=0.5,title="distor of transaction type")
figure.show()
In [10]:
numeric_cols=data.select_dtypes(include=['float64','int64'])
correlation=numeric_cols.corr()
print(correlation)
                    step    amount  oldbalanceOrg  newbalanceOrig  \
step            1.000000  0.022373      -0.010058       -0.010299   
amount          0.022373  1.000000      -0.002762       -0.007861   
oldbalanceOrg  -0.010058 -0.002762       1.000000        0.998803   
newbalanceOrig -0.010299 -0.007861       0.998803        1.000000   
oldbalanceDest  0.027665  0.294137       0.066243        0.067812   
newbalanceDest  0.025888  0.459304       0.042029        0.041837   
isFraud         0.031578  0.076688       0.010154       -0.008148   
isFlaggedFraud  0.003277  0.012295       0.003835        0.003776   

                oldbalanceDest  newbalanceDest   isFraud  isFlaggedFraud  
step                  0.027665        0.025888  0.031578        0.003277  
amount                0.294137        0.459304  0.076688        0.012295  
oldbalanceOrg         0.066243        0.042029  0.010154        0.003835  
newbalanceOrig        0.067812        0.041837 -0.008148        0.003776  
oldbalanceDest        1.000000        0.976569 -0.005885       -0.000513  
newbalanceDest        0.976569        1.000000  0.000535       -0.000529  
isFraud              -0.005885        0.000535  1.000000        0.044109  
isFlaggedFraud       -0.000513       -0.000529  0.044109        1.000000  
In [11]:
correlation["isFraud"].sort_values(ascending=False)
Out[11]:
isFraud           1.000000
amount            0.076688
isFlaggedFraud    0.044109
step              0.031578
oldbalanceOrg     0.010154
newbalanceDest    0.000535
oldbalanceDest   -0.005885
newbalanceOrig   -0.008148
Name: isFraud, dtype: float64
In [12]:
data["type"]=data["type"].map({"CASH_OUT":1,"PAYMENT":2,"CASH_IN":3,"TRANSFER":4,"DEBIT":5})
In [13]:
data["isFraud"]=data["isFraud"].map({0:"no fraud",1:"fraud"})
data.head(5)
Out[13]:
step type amount nameOrig oldbalanceOrg newbalanceOrig nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud
0 1 2 9839.64 C1231006815 170136.0 160296.36 M1979787155 0.0 0.0 no fraud 0
1 1 2 1864.28 C1666544295 21249.0 19384.72 M2044282225 0.0 0.0 no fraud 0
2 1 4 181.00 C1305486145 181.0 0.00 C553264065 0.0 0.0 fraud 0
3 1 1 181.00 C840083671 181.0 0.00 C38997010 21182.0 0.0 fraud 0
4 1 2 11668.14 C2048537720 41554.0 29885.86 M1230701703 0.0 0.0 no fraud 0
In [14]:
#train the model
from sklearn.model_selection import train_test_split
x=np.array(data[["type","amount","oldbalanceOrg","newbalanceOrig"]])
y=np.array(data[["isFraud"]])
In [15]:
from sklearn.tree import DecisionTreeClassifier
In [20]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.10,random_state=42)
model=DecisionTreeClassifier()
model.fit(xtrain,ytrain)
print(model.score(xtest,ytest))
0.9997359578286932
In [22]:
#prediction
features=np.array([[4,9000.0,9000.0,0.0]])
print(model.predict(features))
['fraud']
In [ ]: